# Use this R-Chunk to import all your datasets!

Analysis

The collapse of the US housing bubble and subsequent financial crisis of 2008 led to the largest recession since the Great Depression. In this analysis, I will attempt to outline some of the casues of the housing bubble using various data visualization techniques in R.

Home Prices

The following charts show how rapidly home prices increased in the years leading up to the burst of the bubble. In just six years, home prices doubled in much of the US.

# Use this R-Chunk to plot & visualize your data!


case_shiller <- read_csv("C:\\Users\\Ryan\\Desktop\\Data Wrangling\\Data\\Case_Shiller.csv", col_names = c("Date", "Index_20", "Index_regions"), skip = 1) # import Case Shiller Data

case_shiller$Date <- as.Date(case_shiller$Date,
                        format = "%m/%d/%Y")

date_limits <- c("2000-01-01", "2015-01-01")
date_limits <- as.Date(date_limits)

max_price <- case_shiller %>% 
  filter(Date < as.Date("2015-01-01")) %>% 
  filter(Index_20 == max(Index_20))
max_price <- max_price[,1:2]
  

max_price_region <- case_shiller %>% 
  filter(Date < as.Date("2015-01-01")) %>% 
  filter(Index_regions == max(Index_regions))
max_price_region <- max_price_region[,c(1,3)]

datee <- c(max_price$Date, max_price_region$Date)
indexy <- c(max_price$Index_20, max_price_region$Index_regions)


x <- data.frame("Date" = datee, "Index" = indexy)

######################################################### PLOT
ggplot(data = case_shiller, aes(x = Date, y = Index_20 )) +
  geom_line(aes(y = case_shiller$Index_20), size = 1) +
  geom_line(aes(y = case_shiller$Index_regions), size = 1, color = "navy") +
  geom_line(size = 1, color = "black")  +
  labs(title = "Case-Shiller Home Price Indices", 
       x = "Year", 
       y = "Index", 
       caption = "Source: S&P Dow Jones Indices LLC") +
  coord_cartesian(xlim = date_limits) +
  scale_y_continuous(breaks = seq(0,230, by = 10)) +
  theme_bw() +
   theme( 
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank(),
        panel.grid.major.y = element_line(),
        plot.caption = element_text(hjust = 0)
        ) +
  geom_segment(
   aes(x=as.Date("2000-01-01"),
                  xend=as.Date(max_price$Date),
                  y=max_price$Index_20,
                  yend=max_price$Index_20), 
   linetype = "dashed", color = "red") +
  
  geom_segment(
    aes(x=as.Date(max_price$Date),
       xend=as.Date(max_price$Date),
       y=max_price$Index_20,
       yend=100), 
    linetype = "dashed", color = "red") +
  
  geom_segment(
    aes(x=as.Date("2000-01-01"),
        xend=as.Date(max_price_region$Date),
        y=max_price_region$Index_regions,
        yend=max_price_region$Index_regions), 
    linetype = "dashed", color = "red") +
  
  geom_segment(
    aes(x=as.Date(max_price_region$Date),
       xend=as.Date(max_price_region$Date),
       y=max_price_region$Index_regions,
       yend=100), 
    linetype = "dashed", color = "red") +
  
  scale_x_date(
    date_breaks = "1 year", 
    date_labels = "%Y", 
    expand = c(0,150))  + # this changes the x axis scale to date
  
  geom_point(
    data = x, 
    aes(x = Date, 
        y = Index), 
    color = "red", 
    size = 3) +
  
  geom_label(
    aes(x = as.Date("2003-06-07"), 
        y = 185, 
        label = "Regional Index", 
        color = "Black")) + 
  
  geom_label(
    aes(x = as.Date("2003-10-01"), 
        y = 207, 
        label = "20-City Index", 
        color = "Blue")) +
    scale_colour_manual(values=c("Blue", "Black")) +
  
  theme(legend.position="none")

Since 2000, The Case-Shiller Index has been a popular method of tracking home prices. The 20-city Index tracks real estate values in Atlanta, Boston, Charlotte, Chicago, Cleveland, Dallas, Denver, Detroit, Las Vegas, Los Angeles, Miami, Minneapolis, New York, Phoenix, Portland, San Diego, San Francisco, Seattle, Tampa and Washington, D.C. The regional index uses geographical regions (as shown below) to measure aggregate value. I use this regional index for the rest of the analysis due to the better representation of the entire US. Additionally, I only do comparisons when the Case-Shiller Index starts in 2000.

#####################################################
#this is the case shiller weighting system map



regions <- read_csv("C:\\Users\\Ryan\\Desktop\\Data Wrangling\\Data\\states_and_regions.csv")


pacific <- regions %>% 
  filter(Division == "Pacific")


east_south_central <- regions %>% 
  filter(Division == "East South Central")


mountain <- regions %>% 
  filter(Division == "Mountain")


new_england <- regions %>% 
  filter(Division == "New England")

south_atlantic <- regions %>% 
  filter(Division == "South Atlantic")

west_north_central <- regions %>% 
  filter(Division == "West North Central")

east_north_central <- regions %>% 
  filter(Division == "East North Central")

middle_atlantic <- regions %>% 
  filter(Division == "Middle Atlantic")

west_south_central <- regions %>% 
  filter(Division == "West South Central")



all_states <- map_data("state") 
# Add more states to the lists if you want
states_pacific  <- pacific$State
states_esc  <- east_south_central$State
states_mountain <- mountain$State
states_ne <- new_england$State
states_sa <- south_atlantic$State
states_wnc <- west_north_central$State
states_enc <- east_north_central$State
states_ma <- middle_atlantic$State
states_wsc <- west_south_central$State


colors <- brewer.pal(10, "Set3")



region_plot <- ggplot(all_states, aes(x=long, y=lat, group = group)) +
  geom_polygon( colour = "grey") +
  
  geom_polygon(fill=colors[1], color = "white", data = filter(all_states, region %in% states_pacific)) +
  
  geom_polygon(fill= colors[3], color = "white", data = filter(all_states, region %in% states_esc)) +
  
  geom_polygon(fill= colors[4], color = "white", data = filter(all_states, region %in% states_mountain)) +
  
  geom_polygon(fill= colors[5], color = "white", data = filter(all_states, region %in% states_sa))  +
  
  geom_polygon(fill= colors[6], color = "white", data = filter(all_states, region %in% states_wnc)) +
  
  geom_polygon(fill=colors[7], color = "white", data = filter(all_states, region %in% states_enc))  +
  
  geom_polygon(fill=colors[8], color = "white", data = filter(all_states, region %in% states_ma))  +
  
  geom_polygon(fill=colors[9], color = "white", data = filter(all_states, region %in% states_wsc)) +
  
    geom_polygon(fill= colors[10], color = "white", data = filter(all_states, region %in% states_ne)) 
  
  
region_plot +
  labs(x = "", y ="", caption = "Source: US Census Bureau", title = "Case-Shiller Regions") +
  
   theme_minimal() +
   theme(aspect.ratio=1/1.9) +
  
  
theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank()) +
   theme(axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank())  +
  theme(plot.caption = element_text(hjust = 0)) +
 

  
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(),panel.border = element_rect(colour = "grey", fill=NA, size=2))  +
  annotate("text", x=  -121, y=44, label = "Pacific") + 
  annotate("text", x = -111, y=43, label = "Mountain") + 
  annotate("text", x = -98, y=45, label = "West North Central")  + 
  annotate("text", x = -97, y=33, label = "West South Central")  + 
  annotate("text", x = -85.5, y=41, label = "East North Central")  + 
  annotate("text", x = -87.5, y=35, label = "East\n South\n Central")  + 
  annotate("text", x = -77, y=42, label = "Middle\n Atlantic")  + 
  annotate("text", x = -80, y=35.5, label = "South\n Atlantic")  + 
  annotate("text", x = -70, y=45, label = "New\n England")  +
  
                    coord_map("polyconic") 

To get a sense of how massive the real estate industry is in the US, the following chart shows aggregate values for each of the designated regions.

Billions of dollars

case_shiller_aggregate <-read_csv( "C:\\Users\\Ryan\\Desktop\\Data Wrangling\\Data\\case_shiller_aggregate_regions.csv")


table1 <- as.data.frame(case_shiller_aggregate)

table1 <- table1 %>% 
  mutate_at(vars(-Region), funs(round(.,2)))

datatable1 <- datatable(table1, colnames = c('Region', '1990', '2000', '2010'), 
          options = list(dom = 't'),
          caption = htmltools::tags$caption(
           style = 'caption-side: bottom; text-align: center;',
           'Table 1: ', htmltools::em('Aggregate Home Values')))  

my_vals <- table1$Region


datatable1 %>% 
  formatCurrency(c('1990', '2000','2010'))
# this is the aggregate value of the single family housing stock
# https://www.spglobal.com/spdji/en/documents/methodologies/methodology-sp-corelogic-cs-home-price-indices.pdf

Due to the nature of indicies, there is weighting and averaging that may lead to an over or under estimate of home values in certain areas or regions in the US. I have provided two maps to provide a look beyond these aggregations.

state_house_prices <- read_csv( "C:\\Users\\Ryan\\Desktop\\Data Wrangling\\Data\\state_house_prices.csv") %>% 
   filter(year %in% c("2002Q1", "2003Q1", "2004Q1", "2005Q1", "2006Q1", "2007Q1", "2008Q1", "2009Q1", "2010Q1")) %>% 
   rename(state_abbr = State)

states <- us_states() %>% 
   filter(!state_name %in% c( "Alaska","Hawaii", "Puerto Rico")) %>% 
  select(state_abbr, geometry) 
    
############################

state_hp_dat <- inner_join(states, state_house_prices, by = "state_abbr")


ggplot() +
  geom_sf(data = state_hp_dat, aes(fill = median_price)) +
  facet_wrap(~year) +
  coord_sf(crs = st_crs(2163)) + 
  theme_bw() +
  scale_fill_continuous(low = "navy", high = "red", label = scales::comma) +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.x=element_blank(),
        axis.ticks.y=element_blank()) +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        plot.caption = element_text(hjust = 0)) +
  labs(fill='Price in 2019 dollars', caption = "Source: Federal Housing Finance Agency", title = "Median Home Prices by State (Quarter 1)")

counties_dat <- us_counties() %>% 
  filter(!state_name %in% c( "Alaska","Hawaii", "Puerto Rico","District of Columbia")) %>% 
  mutate(FIPS = paste(statefp,countyfp,sep = ""))

county_house_prices <- read_csv("C:\\Users\\Ryan\\Desktop\\Data Wrangling\\Data\\county_house_prices.csv")

county_house_prices <- county_house_prices %>% 
 pivot_longer(
   cols = starts_with("6/30"),
   names_to = "year",
   values_to = "value",
   values_drop_na = TRUE) %>% 
  filter(year %in% c("6/30/2003", "6/30/2006", "6/30/2012"))


map_dat <- inner_join(counties_dat, county_house_prices, by = "FIPS") 

cols <- c(16,23:25)

map_dat <- map_dat[,cols]

  

pal <- colorNumeric(c("navy", "orange", "red"),
                    domain = min(map_dat$value):max(map_dat$value))



leaflet(width = "90%") %>% 
  setView(lng = -96, lat = 39, zoom = 4) %>% 
  addProviderTiles(providers$CartoDB.Positron) %>%
  
  addPolygons(data = st_as_sf(filter(map_dat, year == "6/30/2003")),
              group = "2000",
              fillOpacity = .5,
              color = ~pal(value),
              label = ~paste("Median Home Price in ",RegionName,": $",value, sep = "")) %>% 
  addPolygons(data = st_as_sf(filter(map_dat, year == "6/30/2006")),
              group = "2006",
              fillOpacity = .5,
              color = ~pal(value),
              label = ~paste("Median Home Price in ",RegionName,": $",value, sep = "")) %>% 
  addPolygons(data = st_as_sf(filter(map_dat, year == "6/30/2012")),
              group = "2020",
              fillOpacity = .5,
              color = ~pal(value),
              label = ~paste("Median Home Price in ",RegionName,": $",value, sep = "")) %>% 
  
  addLayersControl(
    baseGroups = c("2000", "2006", "2020"),
    options = layersControlOptions(collapsed = FALSE))

Supply and Demand Drivers

What was driving force behind this large increase in home prices? In economics, price increases result from either

  1. A decrease in the supply (holding demand constant) or
  2. An increase in demand (holding supply constant)

I will look at a few supply and demand factors that may have contributed to the rapid inflation.

Population Growth

One possible driver of housing demand is an increase in the population. People need homes to live in and large increases in the number of people living in the US could increase demand for houses.

population <- read_csv("C:\\Users\\Ryan\\Desktop\\Data Wrangling\\Data\\Population.csv")

population2 <- population %>% 
  mutate(pct_change = (Population/lag(Population) - 1) * 100)


population2$Date <- as.Date(population$Date,
                        format = "%m/%d/%Y")


p1 <- ggplot(data = population2, aes(x = Date, y = Population)) +
  geom_line(size = 1) + 
  theme_bw() + 
  labs(y = "Population in thousands", x = "") +
  theme(panel.grid.minor = element_blank()) +
  scale_x_date(limits = c(as.Date("1999", "%Y"),as.Date("2006","%Y"))) +
  scale_y_continuous(label = comma, limits = c(280000,300000)) +
  geom_hline(yintercept = 282398, linetype = "dashed") +
  geom_hline(yintercept = 298818, linetype = "dashed", end = "2000") 



p2 <- ggplot(data = population2, aes(x = Date, y = pct_change)) +
  geom_bar(stat = "identity", width = 200) + 
  theme_bw() + 
  labs(y = "Annual Percent Change in Population", x = "") +
  coord_cartesian(ylim = c(-.5,1.25)) +
  scale_x_date(limits = c(as.Date("2000-01-01"),as.Date("2006-11-02")),
               expand = c(0,0)) +
  geom_hline(yintercept = 0) +
  theme(panel.grid.minor = element_blank()) 



  gridExtra::grid.arrange(
  p1,
  p2,
  nrow = 1,
  top = "US Population Growth",
  bottom = "Year")

Income

Another possible driver of housing demand is income level. If people’s income level increases more than the level of inflation, then more people might be able to afford a house.

median_income <- read_csv("C:\\Users\\Ryan\\Desktop\\Data Wrangling\\Data\\Median_Income.csv")

percap <- read_csv("C:\\Users\\Ryan\\Desktop\\Data Wrangling\\Data\\Per_Capita_Income.csv")


median_income$year <- as.character(median_income$year)
median_income$year <- as.Date(ISOdate(median_income$year, 1, 1))  # beginning of year

percap$Year <- as.character(percap$Year)
percap$Year <- as.Date(ISOdate(percap$Year, 1, 1))


median_income <- median_income %>% 
  mutate(pct_change = (census_median/lead(census_median) - 1) * 100)

percap <- percap %>% 
  mutate(pct_change = (CPI_2019/lead(CPI_2019) - 1) * 100)

############################################PLOTS

income_p1 <- ggplot() +
  geom_line(data = percap, 
            mapping = aes(x = percap$Year,
                          y = CPI_2019),
             color = "blue", 
             size = 1) +
  geom_line(data = median_income,
             mapping = aes(x = median_income$year,
                           y = median_income$census_median),
             color = "black", 
             size = 1) +
  theme_bw() +
  
  labs( y = "Dollars (2019 CPI)", x = "") +
  
  scale_x_date(limits = c(as.Date("1999", "%Y"),as.Date("2006","%Y"))) +
  
  geom_label(
    aes(x = as.Date("2003","%Y"), 
        y = 35000, 
        label = "Per Capita Income", 
        color = "Black")) +
  
  geom_label(
    aes(x = as.Date("2001","%Y"),
        y = 63000,
        label = "Median Household Income",
        color = "Blue")) + 
  theme(legend.position = "none") +
  
  scale_colour_manual(values=c("Blue", "Black")) +
  scale_y_continuous(label = comma)


income_p2 <- ggplot(data = median_income, aes(x = year, y = pct_change)) +
  geom_bar(stat = "identity") +
  theme_bw() +
  labs(y = "Percent Change", x = "") +
   scale_x_date(limits = c(as.Date("2000-01-01"),as.Date("2006-11-02")),
               expand = c(0,0)) +
  coord_cartesian(ylim = c(-4,1.5)) +
  geom_hline(yintercept = 0) +
  scale_y_continuous(breaks = round(seq(-4, 1, by = 1),1))




 gridExtra::grid.arrange(
  income_p1,
  income_p2,
  nrow = 1,
  top = "US Income", 
  bottom = "Year")

Housing Stock

The supply of houses can be represented by the number of houses that are currently occupiable in the US–the housing stock. Decreases in the housing stock would increase prices.

houses_stock <- read_csv("C:\\Users\\Ryan\\Desktop\\Data Wrangling\\Data\\Housing_Stock.csv")


houses_stock$Date <- as.Date(ISOdate(houses_stock$Date, 1, 1))




houses_stock <- houses_stock %>% 
  mutate(pct_change = (Stock/lag(Stock) - 1) * 100) 


############################################PLOT

stock_p1 <- ggplot(data = houses_stock, mapping = aes(x = Date, y = Stock)) + 
  geom_line(size = 1) + 
  theme_bw() +
  scale_x_date(limits = c(as.Date("2000-01-01"),as.Date("2006-01-01")),
               expand = c(.1, .1)) +
  coord_cartesian(ylim = c(115000, 130000)) +
  geom_hline(yintercept = 127825.8, linetype = "dashed") +
  geom_hline(yintercept = 116826.3, linetype = "dashed", end = "2000") +
  labs(y = "Total # of Houses", x = "") +
  scale_y_continuous(label = comma)


stock_p2 <- ggplot(data = houses_stock, aes(x = Date, y = pct_change)) +
  geom_bar(stat = "identity") +
  theme_bw() +
  labs(y = "Percent Change", x = "") +
  scale_x_date(limits = c(as.Date("2000-01-01"),as.Date("2006-11-02")),
               expand = c(0,0)) +
  coord_cartesian(ylim = c(-1,2)) +
  geom_hline(yintercept = 0)


 gridExtra::grid.arrange(
  stock_p1,
  stock_p2,
  nrow = 1,
  top = "Housing Stock",
  bottom = "Year")

Comparison

Finally, comparing these factors will give a general idea of what the dominating effect was.

comparisons <- read_csv("C:\\Users\\Ryan\\Desktop\\Data Wrangling\\Data\\percent_changes.csv")

comparisons$side <- as.factor(comparisons$side)
comparisons$driver <- as.factor(comparisons$driver)

levels(comparisons$side) <- c("Demand", "Supply")
levels(comparisons$driver) <- c("Income", "Population", "Housing Stock")

ggplot(data = comparisons, aes(x = driver, y = pct_change, fill = driver))+
  geom_bar(stat = "identity", 
           position = "dodge", 
           color = "black",
           width = .8)+
  facet_grid(~side, scales = "free", space = "free",
             ) +
  scale_fill_manual(values = c("#fee0d2", "#addd8e","#31a354")) +
  theme_bw() +
  geom_hline(yintercept=0) +
   geom_label(aes(label = driver, y = pct_change - (.5 * pct_change)), position = position_dodge(width=.9), fill = "white") +
  theme(legend.position = "none") +
  labs(y = "Percent Change", x = "Driver", caption = "Source: FRED, US Census Bureau") +
  theme(plot.caption = element_text(hjust = 0))

Over a six year period, the net effect of population growth and income was significantly less than the growth of the housing stock. These factors then do not explain the increases in prices.

The Vicious Cycle

One important factor to consider is the evolution of mortgage-backed securities. Lenders would package many mortgages together and sell them as securities to large investment banks. This transfer of liability reduced risk for lenders. Because of reduced risk, lenders began to lower their standards on who they would lend money to.

lending <- read_csv("C:\\Users\\Ryan\\Desktop\\Data Wrangling\\Data\\lending_standards.csv") 

lending$Date <- str_replace(lending$Date, ":", " ")


lending$Date <- zoo::as.yearqtr(lending$Date)
  


spline.d <- as.data.frame(spline(lending$Date, lending$standard_change))
spline.d2 <- as.data.frame(spline(lending$Date, lending$demand_change))

scaleFUN <- function(x) sprintf("%.2f", x)


ggplot(data = spline.d, aes(x = x, y  = y)) +
  geom_hline(yintercept = 0, alpha = .2) +
  geom_line(size = 1, aes(color = "Black")) + # standard change
  geom_line(data = spline.d2, aes(x = x, y = y, color = "Blue", alpha = .5), size = 1) + # demand change
  theme_bw() +
  labs(title = "Lending Standards", x = "Year", y = "Loosening                Tightening", caption = "Source: Federal Reserve Board Senior Loan Officer Opinion Survey") +
  theme(plot.caption = element_text(hjust = 0),
        axis.title.y = element_text(hjust=.4),
        legend.position = "none") +
  geom_label(
    aes(x = as.Date(2003), 
        y = -50, 
        label = "Change in Loan Demand", 
        color = "Blue")) +
  geom_label(
    aes(x = as.Date(2005), 
        y = 50, 
        label = "Change in Loan Standard", 
        color = "Black")) +
    scale_colour_manual(values=c("Blue", "Black")) +
  scale_x_continuous(limits = c(2000,2010), breaks = c(2000, 2002, 2004, 2006, 2008, 2010))

This figure plots the net share of banks reporting tighter/easier standards and stronger/weaker demand for loans. Though not mortgage specific, it still gives an idea to trends in lending standards. From 2000-2005, down-payments were reduced and banks began lending to people called sub-prime lendees–those that have poor credit scores and in other circumstances would not have been eligible for a mortgage. In addition to the lending standards, record low interest rates increased the affordability of mortgages.

mortgage_rate <- read_csv("C:\\Users\\Ryan\\Desktop\\Data Wrangling\\Data\\mortgage_rate.csv") 

mortgage_rate$Date <- mdy(mortgage_rate$Date)



dat_xts <- mortgage_rate %>% 
  timetk::tk_xts(date = Date)





dygraph(data = dat_xts, main = "30-year Mortgage Rates", width="100%", height = 300) %>% 
   dyRangeSelector(dateWindow = c("1995-01-01", "2011-01-01")) %>% 
   dySeries("int_rate", label = "Int. Rate") %>%
   dyEvent("2006-07-01", "Peak Housing Prices", labelLoc = "top")  %>% 
  dyEvent("2000-01-01", "Start of Analysis", labelLoc = "top") %>% 
  dyShading("2000-01-01", "2006-07-01") %>% 
  dyRoller(rollPeriod = 5) %>% 
  dyLimit(5.25, "Record-low 5.27% Interest Rate Pre-housing bubble",color = "red") %>% 
  dyAxis("y", label = "Interest Rate (%)" ) %>% 
  dyAxis("x", label = "Year") %>% 
  dyOptions(axisLineWidth = 1.5, fillGraph = TRUE, drawGrid = FALSE) %>% 
  dyCSS("my.css")

This reduction in lending standards, and record low interest rates led many people to purchase homes. If home prices are steadily increasing, mortgage delinquency rates drop. If people can’t make mortgage payments, they can always sell their house for more than their mortgage is worth and won’t foreclose on the loan. Conversely, if house prices are falling, selling their house won’t fully cover the cost of repaying the mortgage and therefore giving up the house is the better option. This is illustrated by the relationship shown below.

del_rate <- read_csv("C:\\Users\\Ryan\\Desktop\\Data Wrangling\\Data\\deliquency_rate.csv")

del_rate$date <- mdy(del_rate$date)

del_rate <- del_rate %>% 
  filter(date > "2000-01-01")


med_home_price <- read_csv("C:\\Users\\Ryan\\Desktop\\Data Wrangling\\Data\\med_home_prices.csv")

case_shiller2 <- read_csv("C:\\Users\\Ryan\\Desktop\\Data Wrangling\\Data\\Case_Shiller.csv", col_names = c("Date", "Index_20", "Index_regions"), skip = 1) 

case_shiller2$Date <- mdy(case_shiller2$Date)

case_shiller2 <- case_shiller2 %>% 
  rename(date = Date)
  



cor_dat <- left_join(del_rate, case_shiller2, by = "date")




cor_dat$pct_del <- Lag(cor_dat$pct_del, 1)


cor_dat <- cor_dat %>% 
  
  mutate(pct_change_p = (cor_dat$Index_20 / lag(cor_dat$Index_20) - 1 ) * 100) %>% 
  mutate(pct_change_d = (pct_del / lag(pct_del) - 1 ) * 100)
  
 # filter(date > "2000-01-01" & date < "2010-01-01")



ggplot(data = cor_dat, aes(x = pct_change_d, y = pct_change_p)) +
  geom_point(alpha = .7) +
  geom_smooth(method = "lm", se = FALSE) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  geom_vline(xintercept = 0, linetype = "dashed") +
  theme_bw() +
  labs(x = "Percent Change in Mortgage Delinquency", y = "Percent Change in Case-Shiller Index", title = "Relationship Between Home Prices and Delinquency Rates", caption = "Source: Board of Governors of the Federal Reserve System (US), S&P Dow Jones Indices LLC") +
  theme(plot.caption = element_text(hjust = 0))

Increased prices and falling delinquency rates gave many large investment banks the idea that all was well and expectations of future success grew. These expectations and the low-default rate reduced lending standards even further and thus we see the cycle exacerbated. I won’t detail the bursting of the housing bubble but essentially increased interest rates led to dip in the home prices. This shock to the cycle caused many mortgage backed securities to lose value which caused serious disruptions to the finance industry. Many people speculate on the specific causes of the housing bubble and subsequent collapse but this analysis provides one logical explanation.